The DEGs are already ordered by smallest to largest adjusted p-value.
star_all <- read.delim("../../results/star/DEGs/LBD_gene_DEGs_FDRq0.05.txt",
header = TRUE, sep = "\t")
star_XX <- read.delim("../../results/star/DEGs/LBD_gene_DEGs_FDRq0.05_XX.txt",
header = TRUE, sep = "\t")
star_XY <- read.delim("../../results/star/DEGs/LBD_gene_DEGs_FDRq0.05_XY.txt",
header = TRUE, sep = "\t")
sort = FALSE so that the order of the DEGs remains. The DEGs are ordered by adjusted p-value May also order by log2FC
# sort by fold change. Biggest fold change is the first gene
star_all_sort_positiveFC <- star_XY[order(-star_all$adj.P.Val),]
# now repeat for negative fold changes
star_all_sort_negativeFC <- star_XY[order(star_all$adj.P.Val),]
star_XX_sort_positiveFC <- star_XX[order(-star_XX$adj.P.Val),]
star_XX_sort_negativeFC <- star_XX[order(star_XX$adj.P.Val),]
star_XY_sort_positiveFC <- star_XY[order(-star_XY$adj.P.Val),]
star_XY_sort_negativeFC <- star_XY[order(star_XY$adj.P.Val),]
# when subsetting, keep the order. I.e sort = FALSE. Do not resort
up_star_all <- subset(star_all_sort_positiveFC$gene_name,
star_all_sort_positiveFC$logFC > 0, sort = FALSE)
down_star_all <- subset(star_all_sort_negativeFC$gene_name,
star_all_sort_negativeFC$logFC < 0, sort = FALSE)
up_star_XX <- subset(star_XX_sort_positiveFC$gene_name,
star_XX_sort_positiveFC$logFC > 0, sort = FALSE)
down_star_XX <- subset(star_XX_sort_negativeFC$gene_name,
star_XX_sort_negativeFC$logFC < 0, sort = FALSE)
up_star_XY <- subset(star_XY_sort_positiveFC$gene_name,
star_XY_sort_positiveFC$logFC > 0, sort = FALSE)
down_star_XY <- subset(star_XY_sort_negativeFC$gene_name,
star_XY_sort_negativeFC$logFC < 0, sort = FALSE)
ordered_query = TRUE specify that the genes are in a meaningful order. Ordered by adjusted p-value.
gp_up <- gost(up_star_all, ordered_query = TRUE, organism = "hsapiens")
gp_down <- gost(down_star_all, ordered_query = TRUE, organism = "hsapiens")
# inspect
head(gp_up$result)
## query significant p_value term_size query_size intersection_size
## 1 query_1 TRUE 0.0007260184 2428 31 15
## 2 query_1 TRUE 0.0017305449 736 24 8
## 3 query_1 TRUE 0.0171855004 704 24 7
## 4 query_1 TRUE 0.0171855004 704 24 7
## 5 query_1 TRUE 0.0184989682 712 24 7
## 6 query_1 TRUE 0.0387070894 4784 21 14
## precision recall term_id source term_name
## 1 0.4838710 0.006177924 GO:0007399 GO:BP nervous system development
## 2 0.3333333 0.010869565 GO:0099536 GO:BP synaptic signaling
## 3 0.2916667 0.009943182 GO:0098916 GO:BP anterograde trans-synaptic signaling
## 4 0.2916667 0.009943182 GO:0007268 GO:BP chemical synaptic transmission
## 5 0.2916667 0.009831461 GO:0099537 GO:BP trans-synaptic signaling
## 6 0.6666667 0.002926421 GO:0048731 GO:BP system development
## effective_domain_size source_order parents
## 1 21029 3278 GO:0048731
## 2 21029 22351 GO:0007267
## 3 21029 22182 GO:0099537
## 4 21029 3161 GO:0098916
## 5 21029 22352 GO:0099536
## 6 21029 15045 GO:0007275, GO:0048856
head(gp_down$result)
## query significant p_value term_size query_size intersection_size
## 1 query_1 TRUE 0.001085487 16 2 2
## 2 query_1 TRUE 0.001560167 148 105 8
## 3 query_1 TRUE 0.002288569 23 2 2
## 4 query_1 TRUE 0.003672565 29 2 2
## 5 query_1 TRUE 0.012465010 2 53 2
## 6 query_1 TRUE 0.012465010 53 2 2
## precision recall term_id source
## 1 1.00000000 0.12500000 GO:0015671 GO:BP
## 2 0.07619048 0.05405405 GO:0002181 GO:BP
## 3 1.00000000 0.08695652 GO:0015669 GO:BP
## 4 1.00000000 0.06896552 GO:0042744 GO:BP
## 5 0.03773585 1.00000000 GO:0002752 GO:BP
## 6 1.00000000 0.03773585 GO:0042743 GO:BP
## term_name
## 1 oxygen transport
## 2 cytoplasmic translation
## 3 gas transport
## 4 hydrogen peroxide catabolic process
## 5 cell surface pattern recognition receptor signaling pathway
## 6 hydrogen peroxide metabolic process
## effective_domain_size source_order parents
## 1 21029 5357 GO:0015669
## 2 21029 850 GO:0006412
## 3 21029 5355 GO:0006810
## 4 21029 11933 GO:0042743, GO:0044248
## 5 21029 1387 GO:0002220, GO:0002221
## 6 21029 11932 GO:0072593
## Warning: `gather_()` was deprecated in tidyr 1.2.0.
## Please use `gather()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
## png
## 2
## png
## 2
multi_gp = gost(list("down-regulated" = down_star_all,
"up-regulated" = up_star_all), ordered_query = TRUE,
organism = "hsapiens")
# rearrange so that up-regualted is on top
neworder <- c("up-regulated","down-regulated")
multi_gp$result <- arrange(transform(multi_gp$result,
query=factor(query,levels=neworder)),query)
# interactive plot
gostplot(multi_gp, interactive = TRUE)
#gp_up <- gost(up_star_XX, ordered_query = TRUE, organism = "hsapiens")
gp_down <- gost(down_star_XX, ordered_query = TRUE, organism = "hsapiens")
# inspect
#head(gp_up$result)
head(gp_down$result)
## query significant p_value term_size query_size intersection_size
## 1 query_1 TRUE 0.02240358 9 1 1
## 2 query_1 TRUE 0.02240358 9 1 1
## 3 query_1 TRUE 0.02240358 9 1 1
## precision recall term_id source term_name
## 1 1 0.1111111 HPA:0070000 HPA cartilage
## 2 1 0.1111111 HPA:0070111 HPA cartilage; chondrocytes[≥Low]
## 3 1 0.1111111 HPA:0070112 HPA cartilage; chondrocytes[≥Medium]
## effective_domain_size source_order parents
## 1 10976 86 HPA:0000000
## 2 10976 87 HPA:0070000
## 3 10976 88 HPA:0070111
interactive = TRUE means the plot is interactive. You can select a point of interest to see what it is.
interactive = FALSE is needed when saving the plot capped = FALSE indicates if the Y-asix p-value should be capped at 16 or not.
## No results to show
## Please make sure that the organism or namespace is correct
gp_up <- gost(up_star_XY, ordered_query = TRUE, organism = "hsapiens")
gp_down <- gost(down_star_XY, ordered_query = TRUE, organism = "hsapiens")
# inspect
head(gp_up$result)
## query significant p_value term_size query_size intersection_size
## 1 query_1 TRUE 7.951616e-04 2428 35 16
## 2 query_1 TRUE 1.493668e-02 736 31 8
## 3 query_1 TRUE 3.207755e-02 52 3 2
## 4 query_1 TRUE 2.918933e-05 1305 31 12
## 5 query_1 TRUE 8.451554e-05 633 53 11
## 6 query_1 TRUE 3.441771e-04 1347 43 13
## precision recall term_id source term_name
## 1 0.4571429 0.006589786 GO:0007399 GO:BP nervous system development
## 2 0.2580645 0.010869565 GO:0099536 GO:BP synaptic signaling
## 3 0.6666667 0.038461538 GO:0048013 GO:BP ephrin receptor signaling pathway
## 4 0.3870968 0.009195402 GO:0045202 GO:CC synapse
## 5 0.2075472 0.017377567 GO:0030424 GO:CC axon
## 6 0.3023256 0.009651076 GO:0043005 GO:CC neuron projection
## effective_domain_size source_order parents
## 1 21029 3278 GO:0048731
## 2 21029 22351 GO:0007267
## 3 21029 14506 GO:0007169
## 4 21672 2443 GO:0030054
## 5 21672 1098 GO:0043005
## 6 21672 2090 GO:0120025
head(gp_down$result)
## query significant p_value term_size query_size intersection_size
## 1 query_1 TRUE 0.0006647771 148 94 8
## 2 query_1 TRUE 0.0010854871 16 2 2
## 3 query_1 TRUE 0.0022885685 23 2 2
## 4 query_1 TRUE 0.0036725645 29 2 2
## 5 query_1 TRUE 0.0106377731 2 49 2
## 6 query_1 TRUE 0.0124650097 53 2 2
## precision recall term_id source
## 1 0.08510638 0.05405405 GO:0002181 GO:BP
## 2 1.00000000 0.12500000 GO:0015671 GO:BP
## 3 1.00000000 0.08695652 GO:0015669 GO:BP
## 4 1.00000000 0.06896552 GO:0042744 GO:BP
## 5 0.04081633 1.00000000 GO:0002752 GO:BP
## 6 1.00000000 0.03773585 GO:0042743 GO:BP
## term_name
## 1 cytoplasmic translation
## 2 oxygen transport
## 3 gas transport
## 4 hydrogen peroxide catabolic process
## 5 cell surface pattern recognition receptor signaling pathway
## 6 hydrogen peroxide metabolic process
## effective_domain_size source_order parents
## 1 21029 850 GO:0006412
## 2 21029 5357 GO:0015669
## 3 21029 5355 GO:0006810
## 4 21029 11933 GO:0042743, GO:0044248
## 5 21029 1387 GO:0002220, GO:0002221
## 6 21029 11932 GO:0072593
## png
## 2
## png
## 2
multi_gp = gost(list("down-regulated" = down_star_XY,
"up-regulated" = up_star_XY), ordered_query = TRUE,
organism = "hsapiens")
# rearrange so that up-regualted is on top
neworder <- c("up-regulated","down-regulated")
multi_gp$result <- arrange(transform(multi_gp$result,
query=factor(query,levels=neworder)),query)
# interactive plot
gostplot(multi_gp, interactive = TRUE)